#####################################
##REPLICATION FOR MCGHEE, ET AL.,####
##"A PRIMARY CAUSE OF PARTISANSHIP"##
#####################################

##Table A7 (Appendix)##

#Load Data#
library(Zelig)
library(sandwich)
library(MatchIt)
library(WhatIf)
setwd("XXXX") #ENTER DIRECTORY PATHNAME IN PLACE OF XXXX

legyrd <- read.csv("legislator primaries.dem.csv", header=TRUE)

#generate election year counter#
legyrd$yrcnt <- (legyrd$elec-1992)/2

#drop ideowt var, which is not used for this table#
legyrd$ideowt <- NULL

#drop missing data#
legyrd <- na.omit(legyrd)

#define years & states as factors for FE regression#
legyrd$icpsrst <- as.factor(legyrd$icpsrst)
legyrd$yrcnt <- as.factor(legyrd$yrcnt)

##Effect of SCOTUS blanket primary ruling: Democrats##
#AK#
legyrd.ak <- legyrd[legyrd$icpsrst!=71 & legyrd$icpsrst!=73,]
legyrd.ak <- legyrd[legyrd$icpsrst==81 | legyrd$semiop==1,]
legyrd.ak$postsc <- as.integer(legyrd.ak$icpsrst==81 & legyrd.ak$elec>2000)
legyrd.ak$after00 <- as.integer(legyrd.ak$elec>2000)
legyrd.ak$ak <- as.integer(legyrd.ak$icpsrst==81)
m.out <- matchit(ak ~ pvote + yrcnt,
	data=legyrd.ak, method="nearest")
summary(m.out)
m.data <- match.data(m.out)
z.out <- zelig(pred_np ~ postsc + pvote + icpsrst + yrcnt, 
	robust=list(method="vcovHAC"), model="ls", data=m.data)
summary(z.out)
length(m.data$icpsrst)

#alternative modeling approach#
z.out <- zelig(pred_np ~ postsc + ak + after00, 
	model="ls", data=m.data)
summary(z.out)
length(m.data$icpsrst)

#CA#
legyrd.ca <- legyrd[legyrd$icpsrst!=81 & legyrd$icpsrst!=73 & legyrd$elec>1996,]
legyrd.ca <- legyrd[legyrd$icpsrst==71 | legyrd$semicl==1,]
legyrd.ca$postsc <- as.integer(legyrd.ca$icpsrst==71 & legyrd.ca$elec>2000)
legyrd.ca$after00 <- as.integer(legyrd.ca$elec>2000)
legyrd.ca$ca <- as.integer(legyrd.ca$icpsrst==71)
m.out <- matchit(ca ~ pvote + yrcnt,
	data=legyrd.ca, method="nearest")
summary(m.out)
m.data <- match.data(m.out)
z.out <- zelig(pred_np ~ postsc + pvote + icpsrst + yrcnt, 
	robust=list(method="vcovHAC"), model="ls", data=m.data)
summary(z.out)
length(m.data$icpsrst)

#alternative modeling approach#
z.out <- zelig(pred_np ~ postsc + ca + after00, 
	model="ls", data=m.data)
summary(z.out)
length(m.data$icpsrst)

#WA#
legyrd.wa <- legyrd[legyrd$icpsrst!=81 & legyrd$icpsrst!=71 & legyrd$elec<2008,]
legyrd.wa <- legyrd[legyrd$icpsrst==73 | legyrd$open==1,]
legyrd.wa$postsc <- as.integer(legyrd.wa$icpsrst==73 & legyrd.wa$elec>2002)
legyrd.wa$after02 <- as.integer(legyrd.wa$elec>2002)
legyrd.wa$wa <- as.integer(legyrd.wa$icpsrst==73)
m.out <- matchit(wa ~ pvote + yrcnt,
	data=legyrd.wa, method="nearest")
summary(m.out)
m.data <- match.data(m.out)
z.out <- zelig(pred_np ~ postsc + pvote + icpsrst + yrcnt, 
	robust=list(method="vcovHAC"), model="ls", data=m.data)
summary(z.out)
length(m.data$icpsrst)

#alternative modeling approach#
z.out <- zelig(pred_np ~ postsc + wa + after02, 
	model="ls", data=m.data)
summary(z.out)
length(m.data$icpsrst)



